rm(list=ls())

# Load packages
library(haven)
library(tidyverse)
library(data.table)
library(binsreg)
library(lfe)
library(gravity)
library(stargazer)

source("code/config.R")
source(paste0(CODE,"binscatter2.R"))
source(paste0(CODE,"parameters.R"))

df<-read_csv(paste(DATA, "clean_data.csv", sep=""))

# Gravity regressions
out_w <- felm(log(share_commute) ~ 1 | sub_area_d + sub_area_o |
              (t ~ spatial_dist), df)
out_w_w <- felm(log(share_commute_w) ~ 1 | sub_area_d + sub_area_o |
              (t ~ spatial_dist), df)
out_l <- felm(log(share_leisure) ~ 1 | sub_area_d + sub_area_o |
              (t ~ spatial_dist), df)
out_l_w <- felm(log(share_leisure_w) ~ 1 | sub_area_d + sub_area_o |
              (t ~ spatial_dist), df)

# Output results: Table 1
stargazer(out_w,out_w_w,out_l,out_l_w)

# Save fixed effects
out_w_fe <- getfe(out_w)
out_w_w_fe <- getfe(out_w_w)
out_w_fe <- out_w_fe %>% filter(fe=="sub_area_o")%>% select(effect,sub_area_o=idx)
out_w_w_fe <- out_w_w_fe %>% filter(fe=="sub_area_o")%>% select(effect_w=effect,sub_area_o=idx)
temp<-df %>% select(sub_area_o,avg_wage,avg_wage_w,share_low) %>% distinct()
out_w_fe<-left_join(out_w_fe,out_w_w_fe)
out_w_fe<-left_join(out_w_fe,temp)


# Gravity estimation in differences
out_w <- felm(log(share_commute) ~ t_diff | sub_area_d + sub_area_o , df)
out_w_w <- felm(log(share_commute_w) ~ t_diff | sub_area_d + sub_area_o , df)
out_l <- felm(log(share_leisure) ~ t_diff | sub_area_d + sub_area_o , df)
out_l_w <- felm(log(share_leisure_w) ~ t_diff | sub_area_d + sub_area_o , df)
# Output results: Table A6
stargazer(out_w,out_w_w,out_l,out_l_w)

# Regressing the estimated residence fixed effects, µˆn(L,θ), on the log of average residential income

top_4<-c("BUKIT TIMAH", "NOVENA","MARINE PARADE","BISHAN") 
top_4_w<-c("WOODLANDS", "SENGKANG", "BUKIT PANJANG","YISHUN")
results_w1<-lm(-effect~log(avg_wage),out_w_fe)
results_w_w1<-(lm(-effect_w~log(avg_wage_w),out_w_fe))
results_w<-lm(-effect~log(avg_wage),out_w_fe[!(out_w_fe$sub_area_o %in% top_4),])
results_w_w<-(lm(-effect_w~log(avg_wage_w),out_w_fe[!(out_w_fe$sub_area_o %in% top_4_w),]))

# Output results: Table A8
stargazer(results_w1,results_w,results_w_w1,results_w_w)

# Generate binscatter
bins1<-binsreg(y=-out_w_fe$effect,x=log(out_w_fe$avg_wage))
temp<-bins1$data.plot
temp<-temp$`Group Full Sample`$data.dots
ggplot(data=temp,aes(x=x,y=fit))+ geom_point() + geom_abline(intercept = -36.210/2.912  , slope = 4.878/2.912, color="red") + geom_abline(intercept = -79.60/2.912  , slope = 9.897/2.912, color="blue") +xlab("Log Residential Income (Data)") +ylab("Residence Fixed Effects (Commuting Gravity Equation)")

# Save estimated parameters
df$phi_l<-as.numeric(out_w$beta[1])
df$phi_l_w<-as.numeric(out_w_w$beta[1])

df$phi_s<-as.numeric(out_l$beta[1])
df$phi_s_w<-as.numeric(out_l_w$beta[1])

df$epilson_l<-as.numeric(results_w$coefficients[2])
df$epilson_l_w<-as.numeric(results_w_w$coefficients[2])

df$kappa<-df$phi_l/df$epilson_l
df$kappa_w<-df$phi_l_w/df$epilson_l_w

df$epilson_s<-df$phi_s/df$kappa
df$epilson_s_w<-df$phi_s_w/df$kappa_w

# Save fixed effects
out_w_fe2 <- getfe(out_w)
out_w_w_fe2 <- getfe(out_w_w)
out_l_fe2 <- getfe(out_l)
out_l_w_fe2 <- getfe(out_l_w)

out_w_fe2_o<- out_w_fe2 %>% filter(fe=="sub_area_o")%>% select(res_fe_L=effect,sub_area_o=idx)
out_w_fe2_d<- out_w_fe2 %>% filter(fe=="sub_area_d")%>% select(des_fe_L=effect,sub_area_d=idx)

out_w_w_fe2_o<- out_w_w_fe2 %>% filter(fe=="sub_area_o")%>% select(res_fe_L_w=effect,sub_area_o=idx)
out_w_w_fe2_d<- out_w_w_fe2 %>% filter(fe=="sub_area_d")%>% select(des_fe_L_w=effect,sub_area_d=idx)

out_l_fe2_o<- out_l_fe2 %>% filter(fe=="sub_area_o")%>% select(res_fe_S=effect,sub_area_o=idx)
out_l_fe2_d<- out_l_fe2 %>% filter(fe=="sub_area_d")%>% select(des_fe_S=effect,sub_area_d=idx)

out_l_w_fe2_o<- out_l_w_fe2 %>% filter(fe=="sub_area_o")%>% select(res_fe_S_w=effect,sub_area_o=idx)
out_l_w_fe2_d<- out_l_w_fe2 %>% filter(fe=="sub_area_d")%>% select(des_fe_S_w=effect,sub_area_d=idx)

df<-left_join(df,out_w_fe2_o)
df<-left_join(df,out_w_fe2_d)
df<-left_join(df,out_w_w_fe2_o)
df<-left_join(df,out_w_w_fe2_d)
df<-left_join(df,out_l_fe2_o)
df<-left_join(df,out_l_fe2_d)
df<-left_join(df,out_l_w_fe2_o)
df<-left_join(df,out_l_w_fe2_d)

write_csv(df,paste(DATA, "clean_data2.csv", sep=""))

# Create heatmap: Figure A7
df$trips<-df$leisure+df$leisure_w
ggplot(df, aes(sub_area_o, sub_area_d, fill= (log(trips)))) + scale_fill_viridis(discrete=FALSE) +
  geom_tile()+ labs(fill = "Log Trips")+ xlab("Origin") + ylab("Destination") +
  geom_tile()+ labs(title="Heat Map of Log Weekend Trips on Public Transit")+ 
  theme(
    axis.text.x = element_text(
      angle = 90,
      hjust = 1,
      vjust = 0.5 ))

# Merge in amenities data
amenitiesWithNeighbors <- read_csv("data/amenitiesWithNeighbors.csv")
mpsz <- st_read(dsn = paste0(DATA,"master-plan-2014-subzone-boundary-web-shp"), 
                layer = "MP14_SUBZONE_WEB_PL")
mpsz<-mpsz %>% select(SUBZONE_N,SHAPE_Area)
df$SUBZONE_N<-df$sub_area_d
df<- df %>% filter( !is.na(des_fe_S) & !is.na(des_fe_S_w)) %>% select(SUBZONE_N,des_fe_S,des_fe_S_w) %>% distinct()
df_o<-left_join(df,amenitiesWithNeighbors)
df_o<-left_join(df_o,mpsz)

# Regressions: Table A9
r1<-(lm((des_fe_S)~scale(food), df_o ))
r2<-(lm((des_fe_S)~scale(hawker_stall/food), df_o ))
r3<-(lm((des_fe_S)~scale(food_court/food), df_o ))
r5<-(lm((des_fe_S)~scale(supermarkets), df_o ))
r6<-(lm((des_fe_S)~scale(clinics), df_o ))
r11<-(lm((des_fe_S_w)~scale(food), df_o ))
r21<-(lm((des_fe_S_w)~scale(hawker_stall/food), df_o ))
r31<-(lm((des_fe_S_w)~scale(food_court/food), df_o ))
r51<-(lm((des_fe_S_w)~scale(supermarkets), df_o ))
r61<-(lm((des_fe_S_w)~scale(clinics), df_o ))
stargazer(r1,r11,r2,r21,r3,r31,r5,r51,r6,r61)

# Employment density Regressions: Table A7
r2<-(lm(scale((des_fe_L))~scale((workers+workers_w)/SHAPE_Area), df_o ))
r1<-(lm(scale((des_fe_L_w))~scale( (workers+workers_w)/SHAPE_Area), df_o ))
stargazer(r2,r1)


# Repeat for post period data

df<-read_csv(paste(DATA, "clean_data_post.csv", sep=""))

out_w <- felm(log(share_commute) ~ 1 | sub_area_d + sub_area_o |
                (t ~ spatial_dist), df)
out_w_w <- felm(log(share_commute_w) ~ 1 | sub_area_d + sub_area_o |
                  (t ~ spatial_dist), df)
out_l <- felm(log(share_leisure) ~ 1 | sub_area_d + sub_area_o |
                (t ~ spatial_dist), df)
out_l_w <- felm(log(share_leisure_w+0.00001) ~ 1 | sub_area_d + sub_area_o |
                  (t ~ spatial_dist), df)

out_w_fe <- getfe(out_w)
out_w_w_fe <- getfe(out_w_w)

out_w_fe2 <- getfe(out_w)
out_w_w_fe2 <- getfe(out_w_w)
out_l_fe2 <- getfe(out_l)
out_l_w_fe2 <- getfe(out_l_w)

out_w_fe2_o<- out_w_fe2 %>% filter(fe=="sub_area_o")%>% select(res_fe_L=effect,sub_area_o=idx)
out_w_fe2_d<- out_w_fe2 %>% filter(fe=="sub_area_d")%>% select(des_fe_L=effect,sub_area_d=idx)

out_w_w_fe2_o<- out_w_w_fe2 %>% filter(fe=="sub_area_o")%>% select(res_fe_L_w=effect,sub_area_o=idx)
out_w_w_fe2_d<- out_w_w_fe2 %>% filter(fe=="sub_area_d")%>% select(des_fe_L_w=effect,sub_area_d=idx)

out_l_fe2_o<- out_l_fe2 %>% filter(fe=="sub_area_o")%>% select(res_fe_S=effect,sub_area_o=idx)
out_l_fe2_d<- out_l_fe2 %>% filter(fe=="sub_area_d")%>% select(des_fe_S=effect,sub_area_d=idx)

out_l_w_fe2_o<- out_l_w_fe2 %>% filter(fe=="sub_area_o")%>% select(res_fe_S_w=effect,sub_area_o=idx)
out_l_w_fe2_d<- out_l_w_fe2 %>% filter(fe=="sub_area_d")%>% select(des_fe_S_w=effect,sub_area_d=idx)

df<-left_join(df,out_w_fe2_o)
df<-left_join(df,out_w_fe2_d)
df<-left_join(df,out_w_w_fe2_o)
df<-left_join(df,out_w_w_fe2_d)
df<-left_join(df,out_l_fe2_o)
df<-left_join(df,out_l_fe2_d)
df<-left_join(df,out_l_w_fe2_o)
df<-left_join(df,out_l_w_fe2_d)

write_csv(df,paste(DATA, "clean_data_post2.csv", sep=""))

# Gravity estimation in differences
out_w <- felm(log(share_commute) ~ t_diff | sub_area_d + sub_area_o , df)
out_w_w <- felm(log(share_commute_w) ~ t_diff | sub_area_d + sub_area_o , df)
out_l <- felm(log(share_leisure) ~ t_diff | sub_area_d + sub_area_o , df)
out_l_w <- felm(log(share_leisure_w) ~ t_diff | sub_area_d + sub_area_o , df)

